
# 下载LXML库
# PS E:\reptile-python> pip install lxml
# Requirement already satisfied: lxml in c:\program files\python310\lib\site-packages (4.8.0)

# 导包
import lxml.etree
from lxml import etree


# 读取本地文件解析
# HTML_TREE = lxml.etree.parse("./xxx.html")

# 读取网页
HTML_TREE = etree.HTML('https://www.baidu.com/s')
print(HTML_TREE)

# 取body标签下的div元素
divList = HTML_TREE.xpath('//body//div')
print(divList)

# 取ul下的li列表项，取出元素文本
divList = HTML_TREE.xpath('//ul//li[@id]/text()')

# 取元素的class属性值
divList = HTML_TREE.xpath('//ul//li[@id="i1"]/@class')

# 模糊查询 id属性包含i1的li元素 ，获取其文本内容
divList = HTML_TREE.xpath('//ul//li[contains@id="i1"]/text()')

# 查询id属性以i1为开头的li元素 ，获取其文本内容
divList = HTML_TREE.xpath('//ul//li[start-with@id="i1"]/text()')

# 多个条件查询
divList = HTML_TREE.xpath('//ul//li[@id="i1" and @class="ss"]/text()')